# List of estimates to reanalyze
to_reanalyze <- read.csv("raw-data/rd-papers-vars.csv")

# Merge to get article code
rd_papers <- read.csv("data/cl-rd-papers-meta.csv")
rd_papers <- subset(rd_papers, data_avail, c("estimate_code", "article_code"))
stopifnot(all(sort(rd_papers$estimate_code) == sort(to_reanalyze$estimate_code)))
to_reanalyze <- merge(to_reanalyze, rd_papers, by = "estimate_code")
rm(rd_papers)

# Create path to CSV file
to_reanalyze$csv_file <- with(to_reanalyze, ifelse(subsetting, estimate_code, article_code))
to_reanalyze$csv_file <- paste0("data/cl-replication-data/", to_reanalyze$csv_file, ".csv")
to_reanalyze$subsetting <- to_reanalyze$article_code <- NULL

# Check data
stopifnot(
  all(!is.na(to_reanalyze)),
  all(to_reanalyze[to_reanalyze$type == "sharp", c("fuzzy_var", "cluster_var")] == ""),
  all(to_reanalyze[to_reanalyze$type == "sharp_clust", "fuzzy_var"] == ""),
  all(to_reanalyze[to_reanalyze$type == "sharp_clust", "cluster_var"] != ""),
  all(to_reanalyze[to_reanalyze$type == "fuzzy", "fuzzy_var"] != ""),
  all(to_reanalyze[to_reanalyze$type == "fuzzy", "cluster_var"] == "")
)

# Function to run RDHonest
run_rdhonest <- function(outcome, forcing, cutpoint, fuzzy_var, cluster_var) {
  if (!is.null(cluster_var)) {
    c(
      "rdhonest_coef" = NA,
      "rdhonest_se" = NA,
      "rdhonest_tstat" = NA,
      "rdhonest_pval" = NA
    )
  } else {
    if (is.null(fuzzy_var)) {
      non_na <- !is.na(outcome) & !is.na(forcing)
      outcome <- outcome[non_na]
      forcing <- forcing[non_na]
      estM <- RDHonest::NPR_MROT.fit(
        RDHonest::RDData(data.frame(outcome, forcing), cutpoint)
      )
      RDHonest_res <- RDHonest::RDHonest(
        outcome ~ forcing,
        cutoff = cutpoint,
        M = estM,
        opt.criterion = "MSE"
      )
    } else {
      non_na <- !is.na(outcome) & !is.na(forcing) & !is.na(fuzzy_var)
      outcome <- outcome[non_na]
      forcing <- forcing[non_na]
      fuzzy_var <- fuzzy_var[non_na]
      estM <- RDHonest::NPR_MROT.fit(
        RDHonest::FRDData(data.frame(outcome, fuzzy_var, forcing), cutpoint)
      )
      RDHonest_res <- RDHonest::FRDHonest(
        outcome ~ fuzzy_var | forcing,
        cutoff = cutpoint,
        M = estM,
        opt.criterion = "MSE"
      )
    }
    
    estimate <- RDHonest_res$estimate
    imputed_se <- unname(RDHonest_res$hl) / qnorm(0.975)
    imputed_tstat <- abs(estimate) / imputed_se
    imputed_pval <- 2 * pnorm(-imputed_tstat)
    
    c(
      "rdhonest_coef" = estimate,
      "rdhonest_se" = imputed_se,
      "rdhonest_tstat" = imputed_tstat,
      "rdhonest_pval" = imputed_pval
    )
  }
}

# Function to run re-analysis
run_reanalysis <- function(est_row) {
  
  cat(est_row$estimate_code, "\n")
  
  df <- read.csv(est_row$csv_file)
  
  stopifnot(
    est_row$outcome %in% colnames(df),
    est_row$forcing %in% colnames(df),
    est_row$fuzzy_var == "" || est_row$fuzzy_var %in% colnames(df),
    est_row$cluster_var == "" || est_row$cluster_var %in% colnames(df)
  )

  outcome <- as.numeric(df[, est_row$outcome])
  forcing <- as.numeric(df[, est_row$forcing])
  cutpoint <- as.numeric(est_row$cutpoint)
  fuzzy_var <- if (est_row$fuzzy_var == "") {
    NULL
  } else {
    as.numeric(df[, est_row$fuzzy_var])
  }
  cluster_var <- if (est_row$cluster_var == "") {
    NULL
  } else {
    df[, est_row$cluster_var]
  }

  rdrobust_res <- rdrobust::rdrobust(
    y = outcome,
    x = forcing,
    c = cutpoint,
    fuzzy = fuzzy_var,
    cluster = cluster_var,
    all = TRUE
  )
  
  bw <- rdrobust_res$bws[1,1]
  power_y0_sd <- sd(outcome[forcing >= cutpoint - bw & forcing < cutpoint], na.rm = TRUE)
  
  rdpower_res <- lapply(
    c("power_0p1" = 0.1, "power_0p2" = 0.2, "power_0p5" = 0.5, "power_0p8" = 0.8),
    function(cohensd) {
      capture.output(
        tmp_res <- rdpower::rdpower(
          data = cbind(outcome, forcing),
          cutoff = cutpoint,
          tau = cohensd * power_y0_sd,
          fuzzy = fuzzy_var,
          cluster = cluster_var
        )
      )
      tmp_res$power.rbc
    }
  )
  
  y0_sd <- sd(outcome[forcing < cutpoint], na.rm = TRUE)
  
  extract_res <- function(x, base_name) {
    x <- x[c(1, 3), ]
    stopifnot(all(names(x) == c("Conventional", "Robust")))
    names(x) <- paste0(c("cct_conven_", "cct_robust_"), base_name)
    as.list(x)
  }
  
  c(
    "estimate_code" = est_row$estimate_code,
    extract_res(rdrobust_res$coef, "coef"),
    extract_res(rdrobust_res$se, "se"),
    extract_res(rdrobust_res$z, "tstat"),
    extract_res(rdrobust_res$pv, "pval"),
    run_rdhonest(outcome, forcing, cutpoint, fuzzy_var, cluster_var),
    rdpower_res,
    "y0_sd" = y0_sd
  )
}

#est_row <- to_reanalyze[1, ]
#run_reanalysis(est_row)
#reanalysis_res <- by(to_reanalyze[1:4, ], 1:nrow(to_reanalyze[1:4, ]), run_reanalysis)

reanalysis_res <- by(to_reanalyze, 1:nrow(to_reanalyze), run_reanalysis)
reanalysis_res <- do.call(rbind.data.frame, reanalysis_res)

# Post-processing
reanalysis_res$cct_conven_tstat <- abs(reanalysis_res$cct_conven_tstat)
reanalysis_res$cct_robust_tstat <- abs(reanalysis_res$cct_robust_tstat)

stopifnot(
  with(reanalysis_res, abs(abs(cct_conven_coef) / cct_conven_se - cct_conven_tstat) < 0.00001),
  with(reanalysis_res, abs(abs(cct_robust_coef) / cct_robust_se - cct_robust_tstat) < 0.00001),
  with(reanalysis_res, abs(2 * (1 - pnorm(cct_conven_tstat)) - cct_conven_pval) < 0.00001),
  with(reanalysis_res, abs(2 * (1 - pnorm(cct_robust_tstat)) - cct_robust_pval) < 0.00001)
)

write.csv(reanalysis_res, "data/reanalysis-results.csv", row.names = FALSE)
